# Merging the plant data with the load data
library(pacman)
p_load(
  here, fst, data.table, stringr, janitor, lubridate, purrr
)

merge_plant_load = function(eload_var = 'excess_load'){
# Loading data ----------------------------------------------------------------
  print('Loading data')
  # Folder with OGE data
  oge_path = here('Data/electricity-generation/open-grid-emissions')
  # Plant level data
  oge_plant_dt = 
    rbind(
      fread(paste0(
        oge_path,
        '/2019_plant_data_hourly_us_units/individual_plant_data.csv'
      ))[,year:=2019],
      fread(paste0(
        oge_path,
        '/2020_plant_data_hourly_us_units/individual_plant_data.csv'
      ))[,year:=2020],
      fread(paste0(
        oge_path,
        '/2019_plant_data_hourly_us_units/shaped_fleet_data.csv'
      ))[,year:=2019],
      fread(paste0(
        oge_path,
        '/2020_plant_data_hourly_us_units/shaped_fleet_data.csv'
      ))[,year:=2020]
    )[,.(
      plant_id_eia, 
      datetime_utc, 
      year,
      net_generation_mwh,
      fuel_consumed_for_electricity_mmbtu,
      co2e_mass_lb_for_electricity,
      #co2_mass_lb_for_electricity,
      #ch4_mass_lb_for_electricity,
      #n2o_mass_lb_for_electricity,
      nox_mass_lb_for_electricity,
      so2_mass_lb_for_electricity
    )] |> setkey(plant_id_eia, datetime_utc)
  # Load data 
  colnames_vec_in = c('nerc_adj', 'datetime_utc', eload_var)
  oge_nerc_load_dt = 
    read.fst(
      path = here("Data/electricity-generation/clean-load/nerc-load-dt-oge.fst"),
      as.data.table = TRUE
    )[!(nerc_adj %in% c('AK','HI','ASCC')),
      ..colnames_vec_in
    ] |>
    setnames(old = eload_var, new = 'excess_load')%>%
    .[,excess_load_sq := excess_load^2] |>
    dcast(
      formula = datetime_utc ~ nerc_adj,
      value.var = c('excess_load','excess_load_sq')
    ) |> 
    clean_names() |>
    setkey(datetime_utc) 
  # Plant info table 
  oge_plant_info_dt =
    read.fst(
      path = here("Data/electricity-generation/plant-info-dt-oge.fst"),
      as.data.table = TRUE
    )[!(nerc_adj %in% c('AK','HI','ASCC')),.(
      plant_id_eia, 
      year,
      ba_code, 
      nerc_adj,
      shaped_plant_id,
      fuel_category,
      namepcap
    )] |> setkey(plant_id_eia, year)
  raw_plant_info = read.fst(
    path = here("Data/electricity-generation/plant-info-dt-oge.fst"),
    as.data.table = TRUE
  )
# Getting info for the shaped plants ------------------------------------------
  # Removing non-shaped id's, removing wind and solar
  shaped_plant_info_dt = 
    merge(
      fsetdiff(
        oge_plant_info_dt[!is.na(shaped_plant_id),.(plant_id_eia, year)],
        oge_plant_dt[,.(plant_id_eia, year)] |> unique()
      ),
      oge_plant_info_dt, 
      by = c('plant_id_eia','year')
    )[!(fuel_category %in% c('wind','solar'))]
  # Aggregating by fleet
  shaped_info_dt = 
    shaped_plant_info_dt[,.(
      namepcap = sum(namepcap)),
      by = .(
        plant_id_eia = shaped_plant_id, 
        year,
        ba_code, 
        nerc_adj,
        fuel_category
    )]|> setkey(plant_id_eia, year)
  # Adding to the plant info table
  oge_plant_info_dt = 
    funion(
      oge_plant_info_dt[,-'shaped_plant_id'],
      shaped_info_dt
    )
  write.fst(
    shaped_plant_info_dt, 
    path = here("Data/electricity-generation/shaped-plant-info-dt-oge.fst"),
  )
  write.fst(
    shaped_info_dt, 
    path = here("Data/electricity-generation/shaped-info-dt-oge.fst"),
  )
  
# Filling in missing data with zeros ------------------------------------------
  print('cleaning')
  # Min and max hours in the raw data
  plant_open_close_dt = 
    oge_plant_dt[,.(
      first_prod_date = min(datetime_utc),
      last_prod_date = max(datetime_utc),
      tot_prod = sum(net_generation_mwh)),
      keyby = .(plant_id_eia)
    ]
  plant_open_close_dt[,.(
    `Percent of plants open throughout` = sum(fifelse(
    first_prod_date >= lubridate::ymd_hms('2019-01-02 00:00:00')
    | last_prod_date <= lubridate::ymd_hms('2020-12-31 00:00:00'),
    0, tot_prod))/sum(tot_prod)
  )]
  # Creating table with every plant and hour
  plant_hour_dt = 
    map_dfr(
      1:nrow(plant_open_close_dt),
      \(i){
        data.table(
          plant_id_eia = plant_open_close_dt$plant_id_eia[i],
          datetime_utc = seq(
            plant_open_close_dt$first_prod_date[i],
            plant_open_close_dt$last_prod_date[i], 
            by = 'hour'
          )
        )
      }
    ) |> setkey(plant_id_eia, datetime_utc)
  # Merging with the raw data
  oge_plant_dt = 
    merge(
      plant_hour_dt,
      oge_plant_dt,
      by = c('plant_id_eia','datetime_utc'),
      all.x = TRUE
    )
  # Setting NAs to zero 
  oge_plant_dt[,':='(
    net_generation_mwh = fifelse(
      is.na(net_generation_mwh), 0, 
      net_generation_mwh
    ),
    fuel_consumed_for_electricity_mmbtu = fifelse(
      is.na(fuel_consumed_for_electricity_mmbtu), 0, 
      fuel_consumed_for_electricity_mmbtu
    ),
    co2e_mass_lb_for_electricity = fifelse(
      is.na(co2e_mass_lb_for_electricity), 0, 
      co2e_mass_lb_for_electricity
    ),
    nox_mass_lb_for_electricity = fifelse(
      is.na(nox_mass_lb_for_electricity), 0, 
      nox_mass_lb_for_electricity
    ),
    so2_mass_lb_for_electricity = fifelse(
      is.na(so2_mass_lb_for_electricity), 0, 
      so2_mass_lb_for_electricity
    ),
    year = fifelse(
      is.na(year), year(datetime_utc), 
      year
    )
  )]
  
# Merging together ------------------------------------------------------------
  print('merging')
  oge_elec_gen_dt = 
    merge(
      oge_nerc_load_dt,
      oge_plant_dt,
      by = 'datetime_utc'
    ) |> 
    merge(
      oge_plant_info_dt, 
      by = c('plant_id_eia','year')
      #,all.x = TRUE
      # Note that this excludes some rows from shaped_fleet_data, 
      # All of those plants are already in the individual_plant_data
    )
  # Setting production to namepcap if it exceeds it 
  oge_elec_gen_dt[,net_generation_mwh := ifelse(
    net_generation_mwh > namepcap, 
    namepcap, 
    net_generation_mwh
  )]
  
  # Net generation by energy source 
  oge_elec_gen_dt[,.(
    tot_ng = sum(net_generation_mwh),
    tot_co2 = sum(co2e_mass_lb_for_electricity),
    tot_so2 = sum(so2_mass_lb_for_electricity),
    tot_nox = sum(nox_mass_lb_for_electricity)
  ), keyby = fuel_category]
  
# Saving the results ----------------------------------------------------------
  if(str_detect(eload_var,'loss')){
    path_in = here("Data/electricity-generation/elec-gen-dt-oge-line-losses.fst")
  }else{
    path_in = here("Data/electricity-generation/elec-gen-dt-oge.fst")
  }
  write.fst(
    oge_elec_gen_dt,
    path = path_in
  )
  print('done.')
}


